import os
import pandas as pd
import geopandas as gpd
import rasterio
import seaborn as sns

import utils_agg, pvwater_agg, lcmap_agg
import data_dir


if __name__ == '__main__':

    merged = []
    for year in [2011, 2012, 2013, 2014, 2015, 2016]:
        file_name = data_dir.lcmap_vs_pvwater_agg[year]
        # check for file existence, load if it exists
        if os.path.isfile(file_name):
            print(f'Loading {year:d} merged data.')
            df = gpd.read_file(file_name)
        else:
            print(f'Creating {year:d} merged data.')

            # READ LCMAP SHAPEFILE
            print('Reading LCMAP data...')
            # df_l = gpd.read_file(data_dir.lcmap[year])
            df_l = lcmap_agg.load(year=year, file_name=data_dir.lcmap[year])
            # quickly drop by subsetting on county (to improve efficiency)
            # df_l = df_l.loc[df_l['County'].isin(['Monterey', 'Santa Cruz']), :]
            # rename for consistency
            # df_l.rename({f'Crop{year}': 'lcmap'}, axis=1, inplace=True)
            # subset columns to be used
            df_l = df_l.loc[:, ['lcmap', 'geometry']]
            # drop empty and missing
            df_l = df_l.loc[~(df_l['geometry'].is_empty | df_l['geometry'].isna()), :]

            # READ PVWATER SHAPEFILE
            print('Reading PVWater data...')
            df_p = pvwater_agg.load(year=year, file_name=data_dir.pvwater[year])

            # PREPARE TO MERGE
            print('Preparing to merge...')
            # reproject to UTM zone 10
            # this makes sure that coordinates are in meters
            df_l = df_l.to_crs(epsg=26910)
            df_p = df_p.to_crs(epsg=26910)
            # quickly drop by subsetting on bounding box (to improve efficiency)
            xmin, ymin, xmax, ymax = df_p.total_bounds
            df_l = df_l.cx[xmin:xmax, ymin:ymax]
            print('Merging...')
            # merge the two datasets by using geopandas.overlay
            # this loops over all geometries and
            # identifies matches when the two polygons intersect
            df = gpd.overlay(df_l, df_p, how='intersection')
            # drop small geometries that are results of overlay
            # drop areas with 4000 sq meters or below
            # this is roughly an acre
            df = df.loc[df.geometry.area > 4000, :]
            # save the file, named by the year
            df.to_file(file_name)

        merged.append(df.loc[:, ['lcmap', 'pvwater']])

    merged = pd.concat(merged)

    # SAVE CSV
    df = pd.crosstab(merged['lcmap'], merged['pvwater'])
    df.to_csv(data_dir.lcmap_vs_pvwater_csv_agg)

    # PLOT FIGURE
    # plot the cross tabulation
    utils_agg.plot_crosstab(
        input_df=merged, x_key='pvwater', y_key='lcmap',
        x_title='PVWMA Land Use Type',
        y_title='LCMAP Land Use Type',
        file_name=data_dir.lcmap_vs_pvwater_fig_agg,
        vmin=0, vmax=100,
        drop_threshold=50,
        drop_strs=['Ag. Unknown'],
        x_order=[
            'Developed',
            'Cropland',
            'Natural',
            'Other',
        ],
        y_order=[
            'Developed',
            'Cropland',
            'Natural',
            'Other',
        ],
    )

    utils_agg.plot_crosstab(
        input_df=merged, y_key='pvwater', x_key='lcmap',
        y_title='PVWMA Land Use Type',
        x_title='LCMAP Land Use Type',
        file_name=data_dir.lcmap_vs_pvwater_fig_agg_flip,
        vmin=0, vmax=100,
        drop_threshold=50,
        drop_strs=['Ag. Unknown'],
        y_order=[
            'Developed',
            'Cropland',
            'Natural',
            'Other',
        ],
        x_order=[
            'Developed',
            'Cropland',
            'Natural',
            'Other',
        ],
    )

